# -*- coding: utf-8 -*-
import scrapy
from guahaowang.items import GuahaowangItem

class GuahaoSpider(scrapy.Spider):
	name = 'guahao'
	allowed_domains = ['guahao.com']
	start_urls = ['https://www.guahao.com/hospital/areahospitals?q=&pi=all&p=%E5%85%A8%E5%9B%BD&ci=all&c=%E4%B8%8D%E9%99%90&o=all&hl=all&ht=all&hk=&fg=0&ipIsShanghai=false&sort=region_sort']

	def start_requests(self):
		for i in range(1,7):#总共6页
			url = 'https://www.guahao.com/hospital/areahospitals?p=%E6%B9%96%E5%8D%97&q=&fg=0&c=%E9%95%BF%E6%B2%99&hk=&hl=all&ipIsShanghai=false&ci=254&pi=30&sort=region_sort&ht=all&o=all&pageNo=' + str(i)
			print url
			yield scrapy.Request(url,callback=self.parse_item)

	def parse_item(self,response):
		links = response.xpath('//*[@id="g-cfg"]/div[1]/div[3]/ul/li/a/@href').extract()
		#print len(links)
		for link in links:
			yield scrapy.Request(link,callback = self.parse)

	def parse(self, response):
		item = GuahaowangItem()

		print "load....URL:%s"%response.url

		#医院名称
		item['hospital'] = response.xpath('//*[@id="g-cfg"]/div[1]/section/div[1]/div[2]/h1/strong/a/text()').extract()

		#医院等级
		item['grade'] = response.xpath('//*[@id="g-cfg"]/div[1]/section/div[1]/div[2]/h1/span/text()').extract()

		#医院地址
		item['address'] = response.xpath('//*[@id="g-cfg"]/div[1]/section/div[1]/div[2]/div[1]/span/text()').extract()

		#医院电话
		item['tel_num'] = response.xpath('//*[@id="g-cfg"]/div[1]/section/div[1]/div[2]/div[2]/span/text()').extract()

		#官方网站
		item['website'] = response.xpath('//*[@id="g-cfg"]/div[1]/section/div[1]/div[2]/div[3]/span/text()').extract()

		#简介
		item['about'] = response.xpath('//*[@id="g-cfg"]/div[1]/section/div[1]/div[2]/div[4]/span/text()').extract()

		#预约量
		item['yu_yue_liang'] = response.xpath('//*[@id="g-cfg"]/div[1]/section/div[2]/div[2]/div/p[1]/strong[1]/text()').extract()

		#导医服务
		item['dao_yi_fu_wu'] = response.xpath('//*[@id="g-cfg"]/div[1]/section/div[2]/div[2]/div/p[1]/strong[2]/text()').extract()

		#患者评价
		item['huan_zhe_ping_jia'] = response.xpath('//*[@id="g-cfg"]/div[1]/section/div[2]/div[2]/div/p[2]/strong[1]/text()').extract()

		#候诊时间
		item['hou_zhen_shi_jian'] = response.xpath('//*[@id="g-cfg"]/div[1]/section/div[2]/div[2]/div/p[2]/strong[2]/text()').extract()

		return item